Consider the cohort as a single group to start. Do not stratify by case-control status.

Stack the heatmaps for CD4. Are there obvious patterns to the antigens?
Stack the heatmaps for NOTCD4. Are there patterns? Are these patterns different from CD4?

Calculate FS/PFS for each stimulation in CD4 and compare side-by-side in boxplots and test by ANOVA. Is there a clear winner? Calculate FS/PFS for each stimulation in NOTCD4 and compare with boxplot and ANOVA. Is there a winner? Is this different than CD4?

It may also make sense to make a corrplot of the FS/PFS from the different stims. This is asking a different question though, i.e. whether a patient’s cytokine responses tend to be consistently elevated/not-elevated across multiple stims.

Regress FS/PFS against age (linear). Is there an effect of age on T cell functionality? Regress FS/PFS against days from symptom onset (linear). Is there a decrease in functionality with time? Stratify FS/PFS by sex and test by t-test (or Mann-Whitney, though sample size is large enough). Is there a difference?

Only then would you start asking the case-control questions.

library(here)
library(tidyverse)
library(COMPASS)
library(ggplot2)
library(data.table)
library(broom)
library(psych)
library(corrplot)
library(ggpubr)
library(knitr)
library(kableExtra)
library(cowplot)
library(purrr)
library(flowWorkspace)
source(here::here("scripts/my_pheatmap_and_plotMeanGamma.R"))
source(here::here("scripts/20200604_Helper_Functions.R"))
save_output <- FALSE

Read in data

# Next 4 lines from Run_COMPASS.R
stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))

crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
                                stims_for_compass_runs_rep),
                      .f = function(parent, currentStim) {
                        currentStimForFile <- gsub(" ", "_", currentStim)
                        crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
                        readRDS(crPath)
                      }) %>% 
  set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
##  [1] "4_VEMP"       "NOT4_VEMP"    "8_VEMP"       "4_Spike_1"    "NOT4_Spike_1"
##  [6] "8_Spike_1"    "4_Spike_2"    "NOT4_Spike_2" "8_Spike_2"    "4_NCAP"      
## [11] "NOT4_NCAP"    "8_NCAP"
CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")

Stacked heatmaps

plotCOMPASSResultStack(crList[CD4RunNames],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[NotCD4RunNames],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "Not-CD4 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[CD8RunNames],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD8 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4 and Not-CD4 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[c(CD4RunNames, CD8RunNames)],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4 and CD8 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa

plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames, CD8RunNames)],
                       row_annotation = c("Stim"),
                       variable = "Stim",
                       show_rownames = FALSE,
                       main = "CD4, Not-CD4, and CD8 COMPASS Runs",
                       fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 7 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&!IL17a&IL2&IL4/5/13&TNFa

if(save_output) {
  png(filename=here::here("out/PostCompassPlots/20200805_CD4_COMPASS_Heatmap_All_Stims.png"),
      width=700, height=1000)
  plotCOMPASSResultStack(crList[CD4RunNames],
                         row_annotation = c("Stim"),
                         variable = "Stim",
                         show_rownames = FALSE,
                         main = "CD4 COMPASS Runs",
                         fontsize = 14, fontsize_row = 13, fontsize_col = 13)
  dev.off()
  
  png(filename=here::here("out/PostCompassPlots/20200805_NotCD4_COMPASS_Heatmap_All_Stims.png"),
      width=700, height=1000)
  plotCOMPASSResultStack(crList[NotCD4RunNames],
                         row_annotation = c("Stim"),
                         variable = "Stim",
                         show_rownames = FALSE,
                         main = "Not-CD4 COMPASS Runs",
                         fontsize = 14, fontsize_row = 13, fontsize_col = 13)
  dev.off()
  
  png(filename=here::here("out/PostCompassPlots/20200805_CD8_COMPASS_Heatmap_All_Stims.png"),
    width=700, height=1000)
  plotCOMPASSResultStack(crList[CD8RunNames],
                         row_annotation = c("Stim"),
                         variable = "Stim",
                         show_rownames = FALSE,
                         main = "CD8 COMPASS Runs",
                         fontsize = 14, fontsize_row = 13, fontsize_col = 13)
  dev.off()
  
  png(filename=here::here("out/PostCompassPlots/20200805_CD4_and_NotCD4_COMPASS_Heatmap_All_Stims.png"),
      width=900, height=2000)
  plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
                         row_annotation = c("Stim"),
                         variable = "Stim",
                         show_rownames = FALSE,
                         main = "CD4 and Not-CD4 COMPASS Runs",
                         fontsize = 14, fontsize_row = 13, fontsize_col = 13)
  dev.off()
}

FS/PFS across Stims

fs_pfs_df <- lapply(c(CD4RunNames, NotCD4RunNames, CD8RunNames), function(n) {
  as.data.frame(COMPASS::FunctionalityScore(crList[[n]])) %>%
    rownames_to_column("SAMPLE ID") %>% 
    rename(FS = `COMPASS::FunctionalityScore(crList[[n]])`) %>% 
    left_join(as.data.frame(COMPASS::PolyfunctionalityScore(crList[[n]])) %>%
      rownames_to_column("SAMPLE ID") %>% 
      rename(PFS = `COMPASS::PolyfunctionalityScore(crList[[n]])`),
      by = "SAMPLE ID") %>% 
    mutate(COMPASS_run = n)
  } ) %>% 
  data.table::rbindlist() %>% 
  separate(COMPASS_run, into = c("parent", "STIM"), remove = F, extra = "merge") %>% 
  mutate(STIM = factor(STIM, levels = c("Spike_1", "Spike_2", "NCAP", "VEMP")))#,
         #`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))

table(fs_pfs_df$STIM, fs_pfs_df$`SAMPLE ID`) # remove 551432/07B for quade.test in order to have complete block design
##          
##           102C 103C 106C 109C 112C 113C 117C 121C 122C 127C 128C 12C 13 130C
##   Spike_1    3    3    3    3    3    3    3    3    3    3    3   3  3    3
##   Spike_2    3    3    3    3    3    3    3    3    3    3    3   3  3    3
##   NCAP       3    3    3    3    3    3    3    3    3    3    3   3  3    3
##   VEMP       3    3    3    3    3    3    3    3    3    3    3   3  3    3
##          
##           131C 133C 136C 139C 141C 142C 143C 147C 148C 150C 15514 15518 15529
##   Spike_1    3    3    3    3    3    3    3    3    3    3     3     3     3
##   Spike_2    3    3    3    3    3    3    3    3    3    3     3     3     3
##   NCAP       3    3    3    3    3    3    3    3    3    3     3     3     3
##   VEMP       3    3    3    3    3    3    3    3    3    3     3     3     3
##          
##           15530 15545 15548 15554 15575 157C 159C 161C 162C 164C 169C 170C 1C
##   Spike_1     3     3     3     3     3    3    3    3    3    3    3    3  3
##   Spike_2     2     3     2     3     3    3    3    3    3    3    3    3  3
##   NCAP        3     3     2     3     3    3    3    3    3    3    3    3  3
##   VEMP        3     3     3     3     3    3    3    3    3    3    3    3  3
##          
##           21C 23 25 25C 29C 36C 38C 51C 551432 56C 59C 69C 6C 76C 80C 90C 93C
##   Spike_1   3  3  3   3   3   3   3   3      3   3   3   3  3   3   3   3   3
##   Spike_2   3  3  3   3   3   3   3   3      0   3   3   3  3   3   3   3   3
##   NCAP      3  3  3   3   3   3   3   3      3   3   3   3  3   3   3   3   3
##   VEMP      3  3  3   3   3   3   3   3      3   3   3   3  3   3   3   3   3
##          
##           96C BWT20 CLT1 HS09 HS10 HS8
##   Spike_1   3     3    3    3    3   3
##   Spike_2   3     2    3    3    3   3
##   NCAP      3     2    3    3    3   3
##   VEMP      3     3    3    3    3   3
fs_pfs_df.complete <- fs_pfs_df %>%
             dplyr::filter(`SAMPLE ID` != "551432") %>% 
  arrange(STIM, `SAMPLE ID`)
fs_pfs_df.complete.4 <- fs_pfs_df.complete %>% dplyr::filter(parent == "4") %>% 
             mutate(`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))
fs_pfs_df.complete.not4 <- fs_pfs_df.complete %>% dplyr::filter(parent == "NOT4") %>% 
             mutate(`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))
# And for CD8 need to remove more individuals for complete block design
fs_pfs_df.complete.8 <- fs_pfs_df.complete %>% dplyr::filter(parent == "8") %>% 
  dplyr::filter(!(`SAMPLE ID` %in% c("BWT20", "15548", "15530"))) %>% 
             mutate(`SAMPLE ID` = factor(`SAMPLE ID`, levels = unique(`SAMPLE ID`)))

# Perform statistical tests for Functionality Score across Stims
# Quade test is to wilcoxon signed rank test like ANOVA is to t-test
quade_fs_4 <- quade.test(FS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.4)
quade_fs_4
## 
##  Quade test
## 
## data:  FS and STIM and SAMPLE ID
## Quade F = 78.717, num df = 3, denom df = 183, p-value < 2.2e-16
quade_fs_not4 <- quade.test(FS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.not4)
quade_fs_not4
## 
##  Quade test
## 
## data:  FS and STIM and SAMPLE ID
## Quade F = 45.948, num df = 3, denom df = 183, p-value < 2.2e-16
quade_fs_8 <- quade.test(FS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.8)
quade_fs_8
## 
##  Quade test
## 
## data:  FS and STIM and SAMPLE ID
## Quade F = 15.461, num df = 3, denom df = 174, p-value = 5.834e-09
pw_fs_4 <- pairwise.wilcox.test(fs_pfs_df.complete.4$FS,
                      fs_pfs_df.complete.4$STIM,
                      p.adjust.method="bonferroni",
                      paired=TRUE)
pw_fs_4$p.value
##              Spike_1      Spike_2         NCAP
## Spike_2 1.151773e-01           NA           NA
## NCAP    2.593592e-03 3.446827e-07           NA
## VEMP    6.244948e-11 4.659364e-11 5.395379e-11
broom::tidy(pw_fs_4)
## # A tibble: 6 x 3
##   group1  group2   p.value
##   <chr>   <chr>      <dbl>
## 1 Spike_2 Spike_1 1.15e- 1
## 2 NCAP    Spike_1 2.59e- 3
## 3 VEMP    Spike_1 6.24e-11
## 4 NCAP    Spike_2 3.45e- 7
## 5 VEMP    Spike_2 4.66e-11
## 6 VEMP    NCAP    5.40e-11
pw_fs_not4 <- pairwise.wilcox.test(fs_pfs_df.complete.not4$FS,
                      fs_pfs_df.complete.not4$STIM,
                      p.adjust.method="bonferroni",
                      paired=TRUE)
pw_fs_not4$p.value
##              Spike_1      Spike_2         NCAP
## Spike_2 3.230971e-01           NA           NA
## NCAP    1.963071e-08 1.163716e-09           NA
## VEMP    5.273013e-05 1.342156e-02 1.717125e-10
broom::tidy(pw_fs_not4)
## # A tibble: 6 x 3
##   group1  group2   p.value
##   <chr>   <chr>      <dbl>
## 1 Spike_2 Spike_1 3.23e- 1
## 2 NCAP    Spike_1 1.96e- 8
## 3 VEMP    Spike_1 5.27e- 5
## 4 NCAP    Spike_2 1.16e- 9
## 5 VEMP    Spike_2 1.34e- 2
## 6 VEMP    NCAP    1.72e-10
pw_fs_8 <- pairwise.wilcox.test(fs_pfs_df.complete.8$FS,
                      fs_pfs_df.complete.8$STIM,
                      p.adjust.method="bonferroni",
                      paired=TRUE)
pw_fs_8$p.value
##              Spike_1      Spike_2 NCAP
## Spike_2 1.371785e-01           NA   NA
## NCAP    6.151116e-05 2.024740e-03   NA
## VEMP    5.502367e-06 4.915498e-07    1
broom::tidy(pw_fs_8)
## # A tibble: 6 x 3
##   group1  group2      p.value
##   <chr>   <chr>         <dbl>
## 1 Spike_2 Spike_1 0.137      
## 2 NCAP    Spike_1 0.0000615  
## 3 VEMP    Spike_1 0.00000550 
## 4 NCAP    Spike_2 0.00202    
## 5 VEMP    Spike_2 0.000000492
## 6 VEMP    NCAP    1
# Perform statistical tests for PolyFunctionality Score across Stims
quade_pfs_4 <- quade.test(PFS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.4)
quade_pfs_4
## 
##  Quade test
## 
## data:  PFS and STIM and SAMPLE ID
## Quade F = 63.08, num df = 3, denom df = 183, p-value < 2.2e-16
quade_pfs_not4 <- quade.test(PFS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.not4)
quade_pfs_not4
## 
##  Quade test
## 
## data:  PFS and STIM and SAMPLE ID
## Quade F = 34.079, num df = 3, denom df = 183, p-value < 2.2e-16
quade_pfs_8 <- quade.test(PFS ~ STIM | `SAMPLE ID`, data = fs_pfs_df.complete.8)
quade_pfs_8
## 
##  Quade test
## 
## data:  PFS and STIM and SAMPLE ID
## Quade F = 15.807, num df = 3, denom df = 174, p-value = 3.91e-09
pw_pfs_4 <- pairwise.wilcox.test(fs_pfs_df.complete.4$PFS,
                      fs_pfs_df.complete.4$STIM,
                      p.adjust.method="bonferroni",
                      paired=TRUE)
pw_pfs_4$p.value
##              Spike_1      Spike_2         NCAP
## Spike_2 1.574931e-01           NA           NA
## NCAP    1.000000e+00 2.378113e-05           NA
## VEMP    6.882751e-11 4.659364e-11 5.948135e-11
# broom::tidy(pw_pfs_4)

pw_pfs_not4 <- pairwise.wilcox.test(fs_pfs_df.complete.not4$PFS,
                      fs_pfs_df.complete.not4$STIM,
                      p.adjust.method="bonferroni",
                      paired=TRUE)
pw_pfs_not4$p.value
##              Spike_1      Spike_2         NCAP
## Spike_2 1.000000e+00           NA           NA
## NCAP    6.620103e-08 9.866667e-09           NA
## VEMP    4.440744e-02 1.632616e-01 1.014449e-09
# broom::tidy(pw_pfs_not4)

pw_pfs_8 <- pairwise.wilcox.test(fs_pfs_df.complete.8$PFS,
                      fs_pfs_df.complete.8$STIM,
                      p.adjust.method="bonferroni",
                      paired=TRUE)
pw_pfs_8$p.value
##              Spike_1      Spike_2 NCAP
## Spike_2 5.165442e-03           NA   NA
## NCAP    1.307259e-04 1.192973e-01   NA
## VEMP    2.420840e-07 4.915498e-07    1
# broom::tidy(pw_pfs_8)

# Now plot the results
# Note that the dataset used in statistics excludes 551432, but 551432 is plotted below (and the median bar is calculated including 551432)
# Same goes for "BWT20", "15548", "15530" in CD8 tests

parent.labs <- c("CD4", "Not-CD4", "CD8")
names(parent.labs) <- c("4", "NOT4", "8")

plot_fs_pfs_vs_stim <- function(fs_or_pfs) {
  ggplot(fs_pfs_df,
         aes(STIM, !!as.name(fs_or_pfs))) +
    theme_bw(base_size = 22) +
    geom_line(aes(group = `SAMPLE ID`), color="grey") +
    geom_point(color="grey") +
    facet_grid(. ~ parent,
               labeller = labeller(parent = parent.labs)) +
    geom_errorbarh(data = fs_pfs_df %>% group_by(parent, STIM) %>% summarise(med = median(!!as.name(fs_or_pfs))),
                   aes(y = med,
                       xmax = 1.5 + 0.35,
                       xmin = 1.5 - 0.35,
                       height = 0),
                   position=position_dodge(width=0), color = "black") +
    labs(title = sprintf("%s vs Stim", fs_or_pfs),
         y = fs_or_pfs) +
    theme(axis.title.x = element_blank(),
          axis.title.y = element_text(size=22),
          axis.text.y = element_text(color="black", size=22),
          axis.text.x = element_text(color="black", size=22),
          plot.title = element_text(hjust = 0.5),
          panel.grid = element_blank(),
          legend.position = "none",
          plot.margin = margin(1.3, 0.2, 0, 0.2, "cm")) +
    scale_x_discrete(labels=c(c("Spike_1" = "Spike 1", "Spike_2" = "Spike 2")),
                     expand = c(0.1,0.1))
}

plot_fs_pfs_vs_stim("FS")

# Remembering these p-values are adjusted
as.data.frame(pw_fs_4$p.value) %>% 
  rownames_to_column('tmp') %>% 
  mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
  column_to_rownames('tmp') %>% 
  kable(format = "html", escape = F, row.names = T) %>%
  kable_styling("striped", full_width = F)
Spike_1 Spike_2 NCAP
Spike_2 0.115177317170495 NA NA
NCAP 0.00259359228882635 3.4468269894795e-07 NA
VEMP 6.24494803673655e-11 4.65936369334732e-11 5.39537888428502e-11
as.data.frame(pw_fs_not4$p.value) %>% 
  rownames_to_column('tmp') %>% 
  mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
  column_to_rownames('tmp') %>% 
  kable(format = "html", escape = F, row.names = T) %>%
  kable_styling("striped", full_width = F)
Spike_1 Spike_2 NCAP
Spike_2 0.323097099628218 NA NA
NCAP 1.96307141898972e-08 1.16371623364348e-09 NA
VEMP 5.27301309207659e-05 0.0134215615062708 1.717125234786e-10
as.data.frame(pw_fs_8$p.value) %>% 
  rownames_to_column('tmp') %>% 
  mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
  column_to_rownames('tmp') %>% 
  kable(format = "html", escape = F, row.names = T) %>%
  kable_styling("striped", full_width = F)
Spike_1 Spike_2 NCAP
Spike_2 0.137178525432516 NA NA
NCAP 6.15111576317468e-05 0.00202474006060593 NA
VEMP 5.50236697392487e-06 4.91549764427344e-07 1
plot_fs_pfs_vs_stim("PFS")

as.data.frame(pw_pfs_4$p.value) %>% 
  rownames_to_column('tmp') %>% 
  mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
  column_to_rownames('tmp') %>% 
  kable(format = "html", escape = F, row.names = T) %>%
  kable_styling("striped", full_width = F)
Spike_1 Spike_2 NCAP
Spike_2 0.157493084727601 NA NA
NCAP 1 2.37811284128217e-05 NA
VEMP 6.88275106455714e-11 4.65936369334732e-11 5.948134864697e-11
as.data.frame(pw_pfs_not4$p.value) %>% 
  rownames_to_column('tmp') %>% 
  mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
  column_to_rownames('tmp') %>% 
  kable(format = "html", escape = F, row.names = T) %>%
  kable_styling("striped", full_width = F)
Spike_1 Spike_2 NCAP
Spike_2 1 NA NA
NCAP 6.62010338999915e-08 9.86666729441403e-09 NA
VEMP 0.0444074361462703 0.163261630177697 1.01444884747321e-09
as.data.frame(pw_pfs_8$p.value) %>% 
  rownames_to_column('tmp') %>% 
  mutate_all(~cell_spec(.x, color = ifelse(!is.na(.x) & .x < 0.05, "red", "black"))) %>%
  column_to_rownames('tmp') %>% 
  kable(format = "html", escape = F, row.names = T) %>%
  kable_styling("striped", full_width = F)
Spike_1 Spike_2 NCAP
Spike_2 0.00516544163845711 NA NA
NCAP 0.000130725943852043 0.119297317838269 NA
VEMP 2.42083959561873e-07 4.91549764427344e-07 1

As we saw in the heatmaps, NCAP and Spike 1 consistently have the highest FS/PFS scores, and VEMP has the lowest.

How correlated are FS/PFS across stims?

fs_pfs_df_wide <- fs_pfs_df %>%  
               pivot_wider(id_cols = c(`SAMPLE ID`),
                           names_from = c(STIM, parent),
                           values_from = c(FS, PFS))

pairs.panels(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("^FS.*_4")), 
             method = "pearson", # correlation method, "r" displayed in upper off diagonals. Not r^2.
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots,
             lm = T,
             main = "CD4 FS Correlation across Stims")

Write out the FS scores to a file for further integration w/ other T cell and Ab data

if(save_output) {
  fs_dat_2save <- fs_pfs_df_wide %>% 
    rename("SAMPLE_ID" = "SAMPLE ID") %>% 
    dplyr::select("SAMPLE_ID" | matches("^FS.*_4") | matches("^FS.*_8")) %>% 
    rename_all(~ sub("Spike_", "S", .)) %>% 
    rename_at(vars(-"SAMPLE_ID"), ~ sub("FS_(.*)_(.*)", "CD\\2_\\1_FS", .)) %>% 
    # Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
    mutate_at(vars(-"SAMPLE_ID"), ~ format(., digits = 20))
  
  write.csv(fs_dat_2save,
            here::here("processed_data/20200814_HAARVI_COMPASS_FS.csv"), row.names = F)
  # fs_dat_2save <- read.csv(here::here("processed_data/20200814_HAARVI_COMPASS_FS.csv"), stringsAsFactors = F, check.names = F, colClasses = "character")
  # all.equal(fs_dat_2save,
  #  read.csv(here::here("processed_data/20200814_HAARVI_COMPASS_FS.csv"), stringsAsFactors = F, check.names = F, colClasses = "character"))
}
# Investigate the odd dichotomization of FS_VEMP_4 values:
FS_VEMP_4_dat <- as.data.frame((COMPASS::FunctionalityScore(crList$`4_VEMP`))) %>%
  rownames_to_column("SAMPLE ID") %>%
  rename(`4_VEMP_FS` = "(COMPASS::FunctionalityScore(crList$`4_VEMP`))") %>% 
  left_join(crList$`4_VEMP`$data$meta %>% dplyr::select(`SAMPLE ID`, Batch, Cohort, Age, Sex, Race_v2, `Days symptom onset to visit 1`, `$DATE`),
            by = "SAMPLE ID") %>% 
  left_join(crList$`4_VEMP`$fit$mean_gamma %>% as.data.frame() %>% rownames_to_column("SAMPLE ID"),
            by = "SAMPLE ID") %>% 
  mutate(Batch = factor(ifelse(`$DATE` == "05-JUN-2020", 3, Batch)),
         Cohort = factor(ifelse(Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
                                              levels = c("Healthy", "Non-hospitalized", "Hospitalized")),
         `Days symptom onset to visit 1` = as.numeric(`Days symptom onset to visit 1`),
         Race_v2 = factor(ifelse(Race_v2 %in%
                                                c(NA, "N/A"), NA, Race_v2),
                                              levels = c("American Indian or Alaska Native", "Asian", "White", NA)))
## Warning: NAs introduced by coercion
# It has everything to do with the IL4/5/13 only subset
ggplot(FS_VEMP_4_dat, aes(`!IL2&IL4/5/13&!IFNg&!TNFa&!IL17a&!CD154&!CD107a`, `4_VEMP_FS`)) +
  geom_point() +
  labs(x = "IL4/5/13 only subset mean_gamma")

# But is there another metadata variable which explains the IL4/5/13 dichotomy?
ggplot(FS_VEMP_4_dat, aes(Batch, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1)

ggplot(FS_VEMP_4_dat, aes(Cohort, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  theme(axis.text.x = element_text(size=10, angle = 20))

ggplot(FS_VEMP_4_dat, aes(Race_v2, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  theme(axis.text.x = element_text(size=10, angle = 20))

ggplot(FS_VEMP_4_dat, aes(Age, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).

ggplot(FS_VEMP_4_dat, aes(`Days symptom onset to visit 1`, `4_VEMP_FS`)) +
  theme_bw(base_size = 20) +
  geom_point()
## Warning: Removed 6 rows containing missing values (geom_point).

FS_VEMP_4_dat %>% dplyr::filter(`4_VEMP_FS` > 0.013) %>% dplyr::select(`SAMPLE ID`, Batch, Cohort, `4_VEMP_FS`) %>% arrange(Batch, Cohort, `SAMPLE ID`)
##    SAMPLE ID Batch           Cohort  4_VEMP_FS
## 1       148C     1 Non-hospitalized 0.01574803
## 2        25C     1 Non-hospitalized 0.01580827
## 3        51C     1 Non-hospitalized 0.01574744
## 4      15575     1     Hospitalized 0.01582126
## 5       109C     2 Non-hospitalized 0.01574803
## 6       128C     2 Non-hospitalized 0.01574803
## 7       157C     2 Non-hospitalized 0.01574803
## 8        29C     2 Non-hospitalized 0.01574980
## 9        69C     2 Non-hospitalized 0.01584528
## 10        13     2     Hospitalized 0.01574803
## 11      142C     3 Non-hospitalized 0.01575591
## 12       96C     3 Non-hospitalized 0.01589272
## 13      121C     3     Hospitalized 0.01710177
## 14        23     3     Hospitalized 0.02362205
# These points are in each Batch and are not restricted to a Cohort

Continue on with the correlatiohn plots

pairs.panels(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("^FS.*_NOT4")), 
             method = "pearson", # correlation method, "r" displayed in upper off diagonals. Not r^2.
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots,
             lm = T,
             main = "Not-CD4 FS Correlation across Stims")

pairs.panels(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("^FS.*_8")), 
             method = "pearson", # correlation method, "r" displayed in upper off diagonals. Not r^2.
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots,
             lm = T,
             main = "CD8 FS Correlation across Stims")

pairs.panels(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("^PFS.*_4")), 
             method = "pearson", # correlation method, "r" displayed in upper off diagonals. Not r^2.
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots,
             lm = T,
             main = "CD4 FS Correlation across Stims")

pairs.panels(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("^PFS.*_NOT4")), 
             method = "pearson", # correlation method, "r" displayed in upper off diagonals. Not r^2.
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots,
             lm = T,
             main = "Not-CD4 PFS Correlation across Stims")

pairs.panels(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("^PFS.*_8")), 
             method = "pearson", # correlation method, "r" displayed in upper off diagonals. Not r^2.
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots,
             lm = T,
             main = "CD8 FS Correlation across Stims")

Matrix of the p-value of the correlation. First for FS and then PFS. Ignoring CD8 run for now.

M <- cor(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(starts_with("FS") & !ends_with("_8")),
         use = "complete.obs") # missing values are handled by casewise deletion
# mat : is a matrix of data
# ... : further arguments to pass to the native R cor.test function
cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}
getOption("na.action") # Default behavior of cor.test
## [1] "na.omit"
# matrix of the p-value of the correlation
p.mat <- cor.mtest(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("FS")))
head(p.mat[, 1:5])
##                 FS_Spike_1_4 FS_Spike_2_4    FS_NCAP_4  FS_VEMP_4
## FS_Spike_1_4    0.000000e+00 4.276056e-09 2.043326e-10 0.02435271
## FS_Spike_2_4    4.276056e-09 0.000000e+00 2.101399e-06 0.09718621
## FS_NCAP_4       2.043326e-10 2.101399e-06 0.000000e+00 0.02084563
## FS_VEMP_4       2.435271e-02 9.718621e-02 2.084563e-02 0.00000000
## FS_Spike_1_NOT4 1.935413e-09 3.683025e-03 1.358630e-03 0.02271681
## FS_Spike_2_NOT4 4.953304e-01 1.490506e-03 3.459515e-01 0.75480522
##                 FS_Spike_1_NOT4
## FS_Spike_1_4       1.935413e-09
## FS_Spike_2_4       3.683025e-03
## FS_NCAP_4          1.358630e-03
## FS_VEMP_4          2.271681e-02
## FS_Spike_1_NOT4    0.000000e+00
## FS_Spike_2_NOT4    7.133118e-01
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M,
         method="color", col=col(200),  
         type="upper", order="hclust", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col="black", tl.srt=45, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat, sig.level = 0.01, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE)

# significant coefficients are colored
M <- cor(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("PFS") & !ends_with("_8")),
         use = "complete.obs") # missing values are handled by casewise deletion

# matrix of the p-value of the correlation
p.mat <- cor.mtest(fs_pfs_df_wide %>% 
               dplyr::select(-c(`SAMPLE ID`)) %>% 
               dplyr::select(matches("PFS")))
head(p.mat[, 1:5])
##                  PFS_Spike_1_4 PFS_Spike_2_4   PFS_NCAP_4 PFS_VEMP_4
## PFS_Spike_1_4     0.000000e+00  1.474830e-06 1.596741e-08 0.01244674
## PFS_Spike_2_4     1.474830e-06  0.000000e+00 1.847797e-05 0.07134184
## PFS_NCAP_4        1.596741e-08  1.847797e-05 0.000000e+00 0.01689591
## PFS_VEMP_4        1.244674e-02  7.134184e-02 1.689591e-02 0.00000000
## PFS_Spike_1_NOT4  2.137303e-09  9.457649e-03 3.115496e-03 0.01866709
## PFS_Spike_2_NOT4  7.916413e-01  1.660195e-04 4.226240e-01 0.63973316
##                  PFS_Spike_1_NOT4
## PFS_Spike_1_4        2.137303e-09
## PFS_Spike_2_4        9.457649e-03
## PFS_NCAP_4           3.115496e-03
## PFS_VEMP_4           1.866709e-02
## PFS_Spike_1_NOT4     0.000000e+00
## PFS_Spike_2_NOT4     7.268535e-01
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(M,
         method="color", col=col(200),  
         type="upper", order="hclust", 
         addCoef.col = "black", # Add coefficient of correlation
         tl.col="black", tl.srt=45, #Text label color and rotation
         # Combine with significance
         p.mat = p.mat, sig.level = 0.01, insig = "blank", 
         # hide correlation coefficient on the principal diagonal
         diag=FALSE)

# significant coefficients are colored

Observations:
(1) Note that FS and PFS scores for the same condition tend to be highly correlated (not shown). Not surprising.
(2) Within PFS only, there is a mixed bag of significant associations occuring “between CD4 and Not-CD4 for the same condition” and “between different stims”. Tends to be much lower r than those for (1) though.

In general, we can’t rely on a particular condition to predict the FS/PFS of all the other conditions. Each STIM and co-receptor subset provides different information. (If the former were true, it might have simplified our upcoming analyses)

Regress FS/PFS vs Demographics

For this next part, drop the healthies

# Read in the patient manifest with complete data (though it's also in the COMPASSResult object)
data_manifest <- read.csv(here::here("data/Seshadri_HAARVI_PBMC_manifest_merged_11June2020.csv"), check.names = F, stringsAsFactors = F)

setdiff(sort(unique(fs_pfs_df$`SAMPLE ID`)), sort(unique(data_manifest$`Record ID`)))
## [1] "BWT20" "CLT1"  "HS09"
setdiff(sort(unique(data_manifest$`Record ID`)), sort(unique(fs_pfs_df$`SAMPLE ID`)))
## [1] "116C" "37C"  "75C"  "HS9"
# Remember 116C and 37C didn't get run through COMPASS
# 75C was not run at all
# 07B/551432 didn't get run for Spike 2
# For CD8 only: "BWT20", "15548" didn't get run for "Spike 2", "NCAP", and 15530 didn't get run for "Spike 2"
fs_pfs_df_with_manifest <- fs_pfs_df %>%
  mutate("Record ID" = dplyr::recode(`SAMPLE ID`,
                                     "HS09" = "HS9")) %>% # also, 75C did not get run
  full_join(data_manifest, by = c("Record ID" = "Record ID")) %>% 
  dplyr::filter(!(`Record ID` %in% c("116C", "37C", "75C"))) %>% 
  mutate(Days_Symptom_Onset_to_Visit_1 = as.numeric(`Days symptom onset to visit 1`),
         Cohort = factor(ifelse(Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
                                              levels = c("Healthy", "Non-hospitalized", "Hospitalized"))) %>% 
  # Decided to drop the healthies anyway
  dplyr::filter(Cohort != "Healthy")
## Warning: NAs introduced by coercion
unique(fs_pfs_df_with_manifest$Cohort)
## [1] Hospitalized     Non-hospitalized
## Levels: Healthy Non-hospitalized Hospitalized
fs_pfs_df_with_manifest %>% dplyr::filter(is.na(FS) | is.na(`Record ID`) | is.na(`SAMPLE ID`) | is.na(STIM) | is.na(Cohort))
##  [1] SAMPLE ID                     FS                           
##  [3] PFS                           COMPASS_run                  
##  [5] parent                        STIM                         
##  [7] Record ID                     Sample ID                    
##  [9] Collection date               Cell count                   
## [11] Cohort                        Age                          
## [13] Sex                           Race                         
## [15] Hispanic?                     Days symptom onset to visit 1
## [17] Pair ID                       Race_v2                      
## [19] Days_Symptom_Onset_to_Visit_1
## <0 rows> (or 0-length row.names)

FS/PFS vs Age

# Note that p-values on ggscatter plots below are *not* adjusted, e.g. compare below results to this unadjusted p-value:
broom::tidy(lm(FS ~ Age, data = fs_pfs_df_with_manifest %>% dplyr::filter(parent == "4" & STIM == "Spike_2")))
## # A tibble: 2 x 5
##   term         estimate std.error statistic      p.value
##   <chr>           <dbl>     <dbl>     <dbl>        <dbl>
## 1 (Intercept) 0.0496     0.00755      6.57  0.0000000190
## 2 Age         0.0000549  0.000135     0.408 0.685
ggscatter(fs_pfs_df_with_manifest,
          x = "Age", y = "FS",
          color = "STIM", palette = "jco",
          add = "reg.line") +
  facet_grid(parent ~ STIM) +
  stat_cor()
## `geom_smooth()` using formula 'y ~ x'

ggscatter(fs_pfs_df_with_manifest,
          x = "Age", y = "PFS",
          color = "STIM", palette = "jco",
          add = "reg.line") +
  facet_grid(parent ~ STIM) +
  stat_cor()
## `geom_smooth()` using formula 'y ~ x'

FS/PFS vs Days from Symptom Onset to Visit

# Note that p-values on ggscatter plots below are *not* adjusted, e.g.:
broom::tidy(lm(FS ~ Days_Symptom_Onset_to_Visit_1, data = fs_pfs_df_with_manifest %>% dplyr::filter(parent == "4" & STIM == "Spike_2")))
## # A tibble: 2 x 5
##   term                           estimate std.error statistic  p.value
##   <chr>                             <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)                   0.0493     0.00648      7.60  3.88e-10
## 2 Days_Symptom_Onset_to_Visit_1 0.0000661  0.000124     0.534 5.95e- 1
# Note that the Healthies don't have values for Days_Symptom_Onset_to_Visit_1 and will get filtered out below
ggscatter(fs_pfs_df_with_manifest,
          x = "Days_Symptom_Onset_to_Visit_1", y = "FS",
          color = "STIM", palette = "jco",
          add = "reg.line") +
  facet_grid(parent ~ STIM) +
  stat_cor()
## `geom_smooth()` using formula 'y ~ x'

ggscatter(fs_pfs_df_with_manifest,
          x = "Days_Symptom_Onset_to_Visit_1", y = "PFS",
          color = "STIM", palette = "jco",
          add = "reg.line") +
  facet_grid(parent ~ STIM) +
  stat_cor()
## `geom_smooth()` using formula 'y ~ x'

For the CD4 FS vs Age plots, there is a positive association of Age with FS for Spike 1 (at least).

FS/PFS vs Sex

# Note that p-values on ggboxplot plots below are *not* adjusted, e.g.:
broom::tidy(wilcox.test(FS ~ Sex, data = fs_pfs_df_with_manifest %>% dplyr::filter(parent == "4" & STIM == "Spike_2")))
## # A tibble: 1 x 4
##   statistic p.value method                 alternative
##       <dbl>   <dbl> <chr>                  <chr>      
## 1       335   0.269 Wilcoxon rank sum test two.sided
# Note that future ggpubr update should make outlier.shape=NA occur by default when jitter is added, but type out the parameter for now
ggboxplot(fs_pfs_df_with_manifest %>% 
            dplyr::filter(!is.na(Sex)),
          x = "Sex", y = "FS",
          color = "Sex", palette = "jco",
          add = "jitter",
          outlier.shape = NA) + 
  facet_grid(parent ~ STIM) +
  stat_compare_means(aes(group = Sex))

ggboxplot(fs_pfs_df_with_manifest %>% 
            dplyr::filter(!is.na(Sex)),
          x = "Sex", y = "PFS",
          color = "Sex", palette = "jco",
          add = "jitter",
          outlier.shape = NA) + 
  facet_grid(parent ~ STIM) +
  stat_compare_means(aes(group = Sex))

In many of the boxplots above, Males are shifted slightly higher on the FS/PFS axis compared to Females, but none of the comparisons are significant. Perhaps a t-test or un-stratified plot would be significant.

Hosp vs Non-Hosp

  1. Plot the COMPASS heatmaps again, this time including row annotations for Cohort
  2. Compare FS and PFS between Hosp and Non-Hosp

COMPASS Heatmaps

Look at the heatmaps again. Also focus on IFNg

Stack the plots from the different stims, this time with IFNg subsets on the RHS
A commonality to the subsets which are enriched in Hospitalized individuals is CD154.

# First change any NA Cohort levels to "Healthy"
crList2 <- lapply(crList,
                  function(cr) {
                    cr$data$meta$Cohort <- factor(ifelse(cr$data$meta$Cohort %in%
                                                c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", cr$data$meta$Cohort),
                                              levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized")))
                    cr
                  })
names(crList2) <- names(crList)

CohortColors <- c("Healthy" = "#dfc27d", "Non-hospitalized" = "#80cdc1", "Hospitalized" = "#018571")
row_ann_colors <- list(`Cohort` = CohortColors)
cytokine_annotation_colors <- rep("black", 30)
cytokine_order_for_annotation = c("IFNg", "IL2", "TNFa", "CD154", "CD107a", "IL4/5/13", "IL17a")
# Adapt code from COMPASS::plotCompassResultStack to prepare the data for my_plot.COMPASSResult
# First we use mergeMatricesForPlotCOMPASSResultStack to merge the data from the different runs together.
# Then use the merged data to create a fake merged COMPASSResult object with the merged categories, metadata, and mean_gamma data frames
# This should allow us to use some of the customized options in my_plot.COMPASSResult (e.g. to get the IFNg subsets on the RHS)
mergeMatricesForPlotCOMPASSResultStack_tmp <- utils::getFromNamespace("mergeMatricesForPlotCOMPASSResultStack", "COMPASS")

CohortColors <- c("Hospitalized" = "#083b31", "Non-hospitalized" = "#018571", "Healthy" = "#80cdc1")
# CohortColors <- c("Hospitalized" = "#9da4ce", "Non-hospitalized" = "#c1ddd7", "Healthy" = "#ebeef0") # To match Ab data color scheme?
# CohortColors <- c("Hospitalized" = "#5c6494", "Non-hospitalized" = "#728a84", "Healthy" = "#ebeef0")
StimColors <- c("S1" = "#fc68be", "S2" = "#d94e09", "NCAP" = "#6B49F2", "VEMP" = "#D0F249")
row_ann_colors_v2 <- list(`Stim` = StimColors, `Cohort` = CohortColors)

# CD4 runs
cd4_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD4RunNames],
                                   threshold=0.01,
                                   minimum_dof=1,
                                   maximum_dof=Inf,
                                   row_annotation = c("Stim", "Cohort"),
                                   variable = "Stim")
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
if(save_output) {
  saveRDS(cd4_data2plot, here::here("processed_data/20200815_Merged_CD4_COMPASS_Data.rds"))
}
cd4_merged_COMPASSResult <- crList2$`4_Spike_1`
cd4_merged_COMPASSResult$fit$mean_gamma <- cd4_data2plot$MMerged
cd4_merged_COMPASSResult$fit$categories <- cd4_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd4_merged_COMPASSResult$fit$categories) <- rownames(cd4_data2plot$catsMerged)
cd4_merged_COMPASSResult$data$meta <- cd4_data2plot$rowannMerged
cd4_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd4_merged_COMPASSResult$data$meta)
cd4_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^4_", "", cd4_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd4_merged_COMPASSResult$data$meta$Cohort <- factor(cd4_merged_COMPASSResult$data$meta$Cohort, levels = c("Hospitalized", "Non-hospitalized", "Healthy"))
cd4_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd4_heatmap_by_ifng <- print(my_plot.COMPASSResult(cd4_merged_COMPASSResult,
                        y=c("Stim", "Cohort"),
                        fontsize=25, fontsize_row=5, fontsize_col=22,
                        row_annotation_colors=row_ann_colors_v2,
                        cytokine_annotation_colors=cytokine_annotation_colors,
                        cytokine_height_multiplier=1.6, order_by_max_functionality = T,
                        cytokine_order_for_annotation = c("CD154", "IL2", "TNFa", 
                                                          "CD107a", "IL4/5/13", "IL17a", "IFNg"),
                        dichotomize_by_cytokine = "IFNg",
                        dichotomize_by_cytokine_color = "#999999",
                        staircase_cytokine_annotation = TRUE,
                        main="CD4 COMPASS Runs"))
## The 'threshold' filter has removed 0 categories:

## gTree[GRID.gTree.6338]
# CD8 runs
cd8_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD8RunNames],
                                   threshold=0.01,
                                   minimum_dof=1,
                                   maximum_dof=Inf,
                                   row_annotation = c("Stim", "Cohort"),
                                   variable = "Stim")
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
if(save_output) {
  saveRDS(cd8_data2plot, here::here("processed_data/20200815_Merged_CD8_COMPASS_Data.rds"))
}
cd8_merged_COMPASSResult <- crList2$`8_Spike_1`
cd8_merged_COMPASSResult$fit$mean_gamma <- cd8_data2plot$MMerged
cd8_merged_COMPASSResult$fit$categories <- cd8_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd8_merged_COMPASSResult$fit$categories) <- rownames(cd8_data2plot$catsMerged)
cd8_merged_COMPASSResult$data$meta <- cd8_data2plot$rowannMerged
cd8_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd8_merged_COMPASSResult$data$meta)
cd8_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^8_", "", cd8_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd8_merged_COMPASSResult$data$meta$Cohort <- factor(cd8_merged_COMPASSResult$data$meta$Cohort, levels = c("Hospitalized", "Non-hospitalized", "Healthy"))
cd8_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd8_heatmap_by_ifng <- print(my_plot.COMPASSResult(cd8_merged_COMPASSResult,
                        y=c("Stim", "Cohort"),
                        fontsize=25, fontsize_row=5, fontsize_col=22,
                        row_annotation_colors=row_ann_colors_v2,
                        cytokine_annotation_colors=cytokine_annotation_colors,
                        cytokine_height_multiplier=1.6, order_by_max_functionality = T,
                        cytokine_order_for_annotation = c("CD154", "IL2", "TNFa", 
                                                          "CD107a", "IL4/5/13", "IL17a", "IFNg"),
                        dichotomize_by_cytokine = "IFNg",
                        dichotomize_by_cytokine_color = "#999999",
                        staircase_cytokine_annotation = TRUE,
                        main="CD8 COMPASS Runs"))
## The 'threshold' filter has removed 0 categories:

## gTree[GRID.gTree.6647]
if(save_output) {
  # Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
  Arial <- Type1Font(family = "Arial",
                     metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                                 here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                                 here::here("data/Arial_afm/ArialMT-Italic.afm"),
                                 here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
  pdfFonts(Arial = Arial)
  
  pdf(file=here::here("out/PostCompassPlots/20200810_CD4_COMPASS_Heatmap.pdf"), width=12, height=8,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica. pheatmap calls grid.text, which uses Arial by default.
  grid.draw(cd4_heatmap_by_ifng)
  dev.off()
  
  pdf(file=here::here("out/PostCompassPlots/20200810_CD8_COMPASS_Heatmap.pdf"), width=10, height=8,
        onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica. pheatmap calls grid.text, which uses Arial by default.
  grid.draw(cd8_heatmap_by_ifng)
  dev.off()
}

FS/PFS vs Cohort

# Note that p-values on ggboxplot plots below are *not* adjusted, e.g.:
broom::tidy(wilcox.test(FS ~ Cohort, data = fs_pfs_df_with_manifest %>% dplyr::filter(Cohort != "Healthy" & parent == "4" & STIM == "Spike_2")))
## # A tibble: 1 x 4
##   statistic  p.value method                 alternative
##       <dbl>    <dbl> <chr>                  <chr>      
## 1       152 0.000164 Wilcoxon rank sum test two.sided
# Note that future ggpubr update should make outlier.shape=NA occur by default when jitter is added, but include for now
ggboxplot(fs_pfs_df_with_manifest %>% 
            dplyr::filter(Cohort != "Healthy"),
          x = "Cohort", y = "FS",
          color = "Cohort", palette = "jco",
          add = "jitter",
          outlier.shape = NA) + 
  facet_grid(parent ~ STIM) +
  stat_compare_means(aes(group = Cohort)) +
  scale_x_discrete(labels=c("Non-hospitalized" = "Conv\nNon-Hosp", "Hospitalized" = "Conv\nHosp"))

ggboxplot(fs_pfs_df_with_manifest %>% 
            dplyr::filter(Cohort != "Healthy"),
          x = "Cohort", y = "PFS",
          color = "Cohort", palette = "jco",
          add = "jitter",
          outlier.shape = NA) + 
  facet_grid(parent ~ STIM) +
  stat_compare_means(aes(group = Cohort)) +
  scale_x_discrete(labels=c("Non-hospitalized" = "Conv\nNon-Hosp", "Hospitalized" = "Conv\nHosp"))

Background-corrected magnitudes

crList.no_healthy <- lapply(names(crList2),
                  function(cr_name) {
                    cr <- crList2[[cr_name]]
                    print(sprintf("Accessing %s", cr_name))
                    cr$data$meta <- cr$data$meta %>% 
                      # Filter out samples which weren't run through COMPASS
                      dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma))))
                    stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
                    
                    # And make sure that the count data only includes counts for the samples which were run through COMPASS
                    stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
                    stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
                    
                    not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
                    cr$data$meta <- cr$data$meta[not_healthy_idx,] %>% 
                      mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
                    cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]

                    cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
                    cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
                    cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
                    cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
                    
                    cr
                  })
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList2)

source(here::here("scripts/20200604_Helper_Functions.R"))

# Arial is used by dotplot function
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

# Note p-values are bonferroni adjusted in dotplots
# Also note that some points are not shown in order to zoom in better to the bulk of the point
bg_corr_dotplots <- pmap(.l = list(c(CD4RunNames, NotCD4RunNames, CD8RunNames),
                                   rep(c("CD4+", "Not-CD4+", "CD8"), each = 4),
                                   list(c(-0.0013, 0.009), c(-0.0018, 0.0085), c(-0.002, 0.0048), NULL,
                                        c(-0.002, 0.0024), c(-0.0012, 0.004), c(-0.002, 0.005), NULL,
                                        c(-0.0025, 0.0065), NULL, c(-0.0015, 0.005), NULL),
                                   c(5, 5, 3, 5,
                                     5, 5, 5, 5,
                                     5, 5, 5, 5)),
                         .f = function(n, p, y, p_text_size) {
                           # "staircase effect" for categories df heatmap gets done automatically, unlike in my_plot.COMPASSResult
                           # Returned dotplot is cowplot
                           make_dotplot_for_COMPASS_run(crList.no_healthy[[n]], n, parentSubset = p,
                                                        return_output = T, current_ylim = y, include_0_line=T, 
                                                        cytokine_order_for_annotation=cytokine_order_for_annotation,
                                                        p_text_size=p_text_size)
                         })
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
names(bg_corr_dotplots) <- c(CD4RunNames, NotCD4RunNames, CD8RunNames)

if(save_output) {
  # Save the output of make_dotplot_for_COMPASS_run to disk so we can use the extracted data for follow-up analysis
  saveRDS(bg_corr_dotplots, here::here("processed_data/20200824_make_dotplot_for_COMPASS_run_list.rds"))
}

for(n in names(bg_corr_dotplots)) {
  print(plot_grid(ggplot() +
    theme_void() +
    geom_text(aes(0,0,label=n), size=10) +
    xlab(NULL),
    bg_corr_dotplots[[n]]$Dotplot,
    ncol=1, rel_heights = c(0.1, 1)))
}

sig_tests_tally <- lapply(names(bg_corr_dotplots), function(n) {length(which(bg_corr_dotplots[[n]]$Test_Results$p.adj < 0.05))})
names(sig_tests_tally) <- names(bg_corr_dotplots)
sig_tests_tally # make sure all significant comparisons were plotted and accounted for
## $`4_Spike_1`
## [1] 9
## 
## $`4_Spike_2`
## [1] 4
## 
## $`4_NCAP`
## [1] 5
## 
## $`4_VEMP`
## [1] 0
## 
## $NOT4_Spike_1
## [1] 2
## 
## $NOT4_Spike_2
## [1] 1
## 
## $NOT4_NCAP
## [1] 1
## 
## $NOT4_VEMP
## [1] 0
## 
## $`8_Spike_1`
## [1] 0
## 
## $`8_Spike_2`
## [1] 0
## 
## $`8_NCAP`
## [1] 0
## 
## $`8_VEMP`
## [1] 0

Save to disk: background-corrected magnitudes from subsets which were significantly different across hospitalization status. For integrated analysis with other T cell and Ab data.

if(save_output) {
  signif_bgcorr_dat_list <- lapply(c(CD4RunNames, CD8RunNames), function(runName) {
    signif_subset_names <- bg_corr_dotplots[[runName]]$Test_Results %>% dplyr::filter(p.adj < 0.05) %>% dplyr::pull(BooleanSubset)
    bg_corr_dotplots[[runName]]$BgCorrMagnitudes %>% 
      rename("SAMPLE_ID" = "Individual") %>% 
      dplyr::select(c("SAMPLE_ID", signif_subset_names)) %>% 
      rename_at(vars(signif_subset_names), ~ paste("CD", sub("Spike_", "S", runName), " ", ., sep = ""))
  })
  names(signif_bgcorr_dat_list) <- c(CD4RunNames, CD8RunNames)

  bgcorr_dat_2save <- purrr::reduce(signif_bgcorr_dat_list, full_join, by = "SAMPLE_ID") %>% 
    # Convert proportions into percents
    # Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
    mutate_at(vars(-"SAMPLE_ID"), ~ format(. * 100, digits = 20))

  write.csv(bgcorr_dat_2save, here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), row.names = F)
  # bgcorr_dat_2save <- read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F)
  # all.equal(bgcorr_dat_2save %>% mutate_at(vars(-"SAMPLE_ID"), as.numeric),
  #  read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F))
}